//
// Project: Disagreement in science: Missing women



clear all
version 15.1  



//
// set locals

// method of identifying gender
local female "female_genderize"
local male "male_genderize"

// gender of author is known
local known_gender "female_genderize!=."




//
// AER

// call data
use "${data}/output/aer_data_gender.dta", clear
drop if month=="May" & year!=2019  // exclude AEA papers and proceedings
drop if year==2020  
keep if (comment | research_article)

// count number of authors in article
by article_id, sort: egen number_authors = count(article_id)
by article_id, sort: egen number_men = total(`male')
by article_id, sort: egen number_women = total(`female')
generate number_identified_authors = number_men+number_women
collapse (firstnm) number_men number_women number_authors number_identified_authors year comment research_article, by(article_id)
generate single_authored = number_authors==1

// analysis
* men
regress single_authored i.comment if number_men>=1
margins comment
matrix m = r(b)
local men_regular = m[1,1]
local men_comment = m[1,2]
* women
regress single_authored i.comment if number_women>=1
margins comment
matrix f = r(b)
local women_regular = f[1,1]
local women_comment = f[1,2]

tempname memhold
postfile `memhold' female comment xvalue fraction_single_author using "${data}/temporary/single_author_aer.dta", replace
post `memhold' (0) (0) (0) (`men_regular')
post `memhold' (0) (1) (1) (`men_comment')
post `memhold' (1) (0) (3) (`women_regular')
post `memhold' (1) (1) (4) (`women_comment')
postclose `memhold'
use "${data}/temporary/single_author_aer.dta", clear
format fraction_single_author %9.2f
replace fraction_single_author = fraction_single_author*-1 if female==1 & comment==1  // convert -2.78e-17 into 0 so that 0 rather that -0 is  displayed in the graph 
twoway (bar fraction_single_author xvalue if comment==0, barwidth(0.8) color(gs3%80)) (scatter fraction_single_author xvalue, msymbol(i) mlabel(fraction_single_author) mlabposition(12) mlabsize(medsmall) mlabcolor(gs3)) (bar fraction_single_author xvalue if comment==1, barwidth(0.8) color(gs9%80)), plotregion(style(none)) scheme(s1color) title("") xtitle("") ytitle("Fraction of articles that are solo-authored") xlabel(-1.5 `" "' 0.5 `""Articles with at least" "one male author""' 2 `" "' 3.5 `""Articles with at least" "one female author""' 5.5 `" "', noticks labsize(medsmall)) ylabel(0 .10 `"0.10"' .20 `"0.20"' .30 `"0.30"' .4 `"0.40"' .5 `"0.50"', grid angle(horizontal) labsize(medsmall)) yscale(r(., .5)) legend(row(1) size(medsmall) order(1 "regular article" 3 "comment") region(lwidth(none)))
graph export "${output}/single_author_aer.eps", replace
graph export "${output}/single_author_aer.png", replace



//
// ASR

// call data
use "${data}/output/asr_data_gender.dta", clear
keep if (comment | research_article)

// count number of authors in article
by article_id, sort: egen number_authors = count(article_id)
by article_id, sort: egen number_men = total(`male')
by article_id, sort: egen number_women = total(`female')
generate number_identified_authors = number_men+number_women
collapse (firstnm) number_men number_women number_authors number_identified_authors year comment research_article, by(article_id)
generate single_authored = number_authors==1

// analysis
* men
regress single_authored i.comment if number_men>=1
margins comment
matrix m = r(b)
local men_regular = m[1,1]
local men_comment = m[1,2]
* women
regress single_authored i.comment if number_women>=1
margins comment
matrix f = r(b)
local women_regular = f[1,1]
local women_comment = f[1,2]

tempname memhold
postfile `memhold' female comment xvalue fraction_single_author using "${data}/temporary/single_author_asr.dta", replace
post `memhold' (0) (0) (0) (`men_regular')
post `memhold' (0) (1) (1) (`men_comment')
post `memhold' (1) (0) (3) (`women_regular')
post `memhold' (1) (1) (4) (`women_comment')
postclose `memhold'
use "${data}/temporary/single_author_asr.dta", clear
format fraction_single_author %9.2f
twoway (bar fraction_single_author xvalue if comment==0, barwidth(0.8) color(gs3%80)) (scatter fraction_single_author xvalue, msymbol(i) mlabel(fraction_single_author) mlabposition(12) mlabsize(medsmall) mlabcolor(gs3)) (bar fraction_single_author xvalue if comment==1, barwidth(0.8) color(gs9%80)), plotregion(style(none)) scheme(s1color) title("") xtitle("") ytitle("Fraction of articles that are solo-authored") xlabel(-1.5 `" "' 0.5 `""Articles with at least" "one male author""' 2 `" "' 3.5 `""Articles with at least" "one female author""' 5.5 `" "', noticks labsize(medsmall)) ylabel(0 .10 `"0.10"' .20 `"0.20"' .30 `"0.30"' .4 `"0.40"' .5 `"0.50"', grid angle(horizontal) labsize(medsmall)) yscale(r(., .5)) legend(row(1) size(medsmall) order(1 "regular article" 3 "comment") region(lwidth(none)))
graph export "${output}/single_author_asr.eps", replace
graph export "${output}/single_author_asr.png", replace



//
// JAMA

// call data
use "${data}/output/jama_pubmed_data_gender.dta", clear
drop if year==2020  
drop if year<2002  // full author names from PubMed not available
drop if year<2013  // Comment & Response section started in July 2013
drop if year==2013 & month=="January"  // Comment & Response section started in July 2013
drop if year==2013 & month=="February"  // Comment & Response section started in July 2013
drop if year==2013 & month=="March"  // Comment & Response section started in July 2013
drop if year==2013 & month=="April"  // Comment & Response section started in July 2013
drop if year==2013 & month=="May"  // Comment & Response section started in July 2013
drop if year==2013 & month=="June"  // Comment & Response section started in July 2013 
keep if comment | research_article
drop if article_with_etal
drop if strpos(full_name, "Fontanarosa")  // this JAMA editor appeared as first author of letters to the editor

// count number of authors in article
by article_id, sort: egen number_authors = count(article_id)
by article_id, sort: egen number_men = total(`male')
by article_id, sort: egen number_women = total(`female')
generate number_identified_authors = number_men+number_women
collapse (firstnm) number_men number_women number_authors number_identified_authors year comment research_article, by(article_id)
generate single_authored = number_authors==1

// analysis
* men
regress single_authored i.comment if number_men>=1
margins comment
matrix m = r(b)
local men_regular = m[1,1]
local men_comment = m[1,2]
* women
regress single_authored i.comment if number_women>=1
margins comment
matrix f = r(b)
local women_regular = f[1,1]
local women_comment = f[1,2]

tempname memhold
postfile `memhold' female comment xvalue fraction_single_author using "${data}/temporary/single_author_jama.dta", replace
post `memhold' (0) (0) (0) (`men_regular')
post `memhold' (0) (1) (1) (`men_comment')
post `memhold' (1) (0) (3) (`women_regular')
post `memhold' (1) (1) (4) (`women_comment')
postclose `memhold'
use "${data}/temporary/single_author_jama.dta", clear
format fraction_single_author %9.2f
twoway (bar fraction_single_author xvalue if comment==0, barwidth(0.8) color(gs3%80)) (scatter fraction_single_author xvalue, msymbol(i) mlabel(fraction_single_author) mlabposition(12) mlabsize(medsmall) mlabcolor(gs3)) (bar fraction_single_author xvalue if comment==1, barwidth(0.8) color(gs9%80)), plotregion(style(none)) scheme(s1color) title("") xtitle("") ytitle("Fraction of articles that are solo-authored") xlabel(-1.5 `" "' 0.5 `""Articles with at least" "one male author""' 2 `" "' 3.5 `""Articles with at least" "one female author""' 5.5 `" "', noticks labsize(medsmall)) ylabel(0 .10 `"0.10"' .20 `"0.20"' .30 `"0.30"' .4 `"0.40"' .5 `"0.50"', grid angle(horizontal) labsize(medsmall)) yscale(r(., .5)) legend(row(1) size(medsmall) order(1 "regular article" 3 "comment") region(lwidth(none)))
graph export "${output}/single_author_jama.eps", replace
graph export "${output}/single_author_jama.png", replace



//
// Nature

// call data
use "${data}/output/nature_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// count number of authors in article
by article_id, sort: egen number_authors = count(article_id)
by article_id, sort: egen number_men = total(`male')
by article_id, sort: egen number_women = total(`female')
generate number_identified_authors = number_men+number_women
collapse (firstnm) number_men number_women number_authors number_identified_authors year comment research_article, by(article_id)
generate single_authored = number_authors==1

// analysis
* men
regress single_authored i.comment if number_men>=1
margins comment
matrix m = r(b)
local men_regular = m[1,1]
local men_comment = m[1,2]
* women
regress single_authored i.comment if number_women>=1
margins comment
matrix f = r(b)
local women_regular = f[1,1]
local women_comment = f[1,2]

tempname memhold
postfile `memhold' female comment xvalue fraction_single_author using "${data}/temporary/single_author_nature.dta", replace
post `memhold' (0) (0) (0) (`men_regular')
post `memhold' (0) (1) (1) (`men_comment')
post `memhold' (1) (0) (3) (`women_regular')
post `memhold' (1) (1) (4) (`women_comment')
postclose `memhold'
use "${data}/temporary/single_author_nature.dta", clear
format fraction_single_author %9.2f
twoway (bar fraction_single_author xvalue if comment==0, barwidth(0.8) color(gs3%80)) (scatter fraction_single_author xvalue, msymbol(i) mlabel(fraction_single_author) mlabposition(12) mlabsize(medsmall) mlabcolor(gs3)) (bar fraction_single_author xvalue if comment==1, barwidth(0.8) color(gs9%80)), plotregion(style(none)) scheme(s1color) title("") xtitle("") ytitle("Fraction of articles that are solo-authored") xlabel(-1.5 `" "' 0.5 `""Articles with at least" "one male author""' 2 `" "' 3.5 `""Articles with at least" "one female author""' 5.5 `" "', noticks labsize(medsmall)) ylabel(0 .10 `"0.10"' .20 `"0.20"' .30 `"0.30"' .4 `"0.40"' .5 `"0.50"', grid angle(horizontal) labsize(medsmall)) yscale(r(., .5)) legend(row(1) size(medsmall) order(1 "regular article" 3 "comment") region(lwidth(none)))
graph export "${output}/single_author_nature.eps", replace
graph export "${output}/single_author_nature.png", replace



//
// PNAS

// call data
use "${data}/output/pnas_data_gender.dta", clear
drop if full_name=="II" | full_name=="III" | full_name=="IV" | full_name=="Jr" | full_name=="Jr."  // erroneously scraped as separate author-article observations
drop if year==2020 | year<2008  // PNAS started comments in 2008
keep if comment | research_article

// count number of authors in article
by article_id, sort: egen number_authors = count(article_id)
by article_id, sort: egen number_men = total(`male')
by article_id, sort: egen number_women = total(`female')
generate number_identified_authors = number_men+number_women
collapse (firstnm) number_men number_women number_authors number_identified_authors year comment research_article, by(article_id)
generate single_authored = number_authors==1

// analysis
* men
regress single_authored i.comment if number_men>=1
margins comment
matrix m = r(b)
local men_regular = m[1,1]
local men_comment = m[1,2]
* women
regress single_authored i.comment if number_women>=1
margins comment
matrix f = r(b)
local women_regular = f[1,1]
local women_comment = f[1,2]

tempname memhold
postfile `memhold' female comment xvalue fraction_single_author using "${data}/temporary/single_author_pnas.dta", replace
post `memhold' (0) (0) (0) (`men_regular')
post `memhold' (0) (1) (1) (`men_comment')
post `memhold' (1) (0) (3) (`women_regular')
post `memhold' (1) (1) (4) (`women_comment')
postclose `memhold'
use "${data}/temporary/single_author_pnas.dta", clear
format fraction_single_author %9.2f
twoway (bar fraction_single_author xvalue if comment==0, barwidth(0.8) color(gs3%80)) (scatter fraction_single_author xvalue, msymbol(i) mlabel(fraction_single_author) mlabposition(12) mlabsize(medsmall) mlabcolor(gs3)) (bar fraction_single_author xvalue if comment==1, barwidth(0.8) color(gs9%80)), plotregion(style(none)) scheme(s1color) title("") xtitle("") ytitle("Fraction of articles that are solo-authored") xlabel(-1.5 `" "' 0.5 `""Articles with at least" "one male author""' 2 `" "' 3.5 `""Articles with at least" "one female author""' 5.5 `" "', noticks labsize(medsmall)) ylabel(0 .10 `"0.10"' .20 `"0.20"' .30 `"0.30"' .4 `"0.40"' .5 `"0.50"', grid angle(horizontal) labsize(medsmall)) yscale(r(., .5)) legend(row(1) size(medsmall) order(1 "regular article" 3 "comment") region(lwidth(none)))
graph export "${output}/single_author_pnas.eps", replace
graph export "${output}/single_author_pnas.png", replace



//
// Science

// call data
use "${data}/output/science_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// count number of authors in article
by article_id, sort: egen number_authors = count(article_id)
by article_id, sort: egen number_men = total(`male')
by article_id, sort: egen number_women = total(`female')
generate number_identified_authors = number_men+number_women
collapse (firstnm) number_men number_women number_authors number_identified_authors year comment research_article, by(article_id)
generate single_authored = number_authors==1

// analysis
* men
regress single_authored i.comment if number_men>=1
margins comment
matrix m = r(b)
local men_regular = m[1,1]
local men_comment = m[1,2]
* women
regress single_authored i.comment if number_women>=1
margins comment
matrix f = r(b)
local women_regular = f[1,1]
local women_comment = f[1,2]

tempname memhold
postfile `memhold' female comment xvalue fraction_single_author using "${data}/temporary/single_author_science.dta", replace
post `memhold' (0) (0) (0) (`men_regular')
post `memhold' (0) (1) (1) (`men_comment')
post `memhold' (1) (0) (3) (`women_regular')
post `memhold' (1) (1) (4) (`women_comment')
postclose `memhold'
use "${data}/temporary/single_author_science.dta", clear
format fraction_single_author %9.2f
twoway (bar fraction_single_author xvalue if comment==0, barwidth(0.8) color(gs3%80)) (scatter fraction_single_author xvalue, msymbol(i) mlabel(fraction_single_author) mlabposition(12) mlabsize(medsmall) mlabcolor(gs3)) (bar fraction_single_author xvalue if comment==1, barwidth(0.8) color(gs9%80)), plotregion(style(none)) scheme(s1color) title("") xtitle("") ytitle("Fraction of articles that are solo-authored") xlabel(-1.5 `" "' 0.5 `""Articles with at least" "one male author""' 2 `" "' 3.5 `""Articles with at least" "one female author""' 5.5 `" "', noticks labsize(medsmall)) ylabel(0 .10 `"0.10"' .20 `"0.20"' .30 `"0.30"' .4 `"0.40"' .5 `"0.50"', grid angle(horizontal) labsize(medsmall)) yscale(r(., .5)) legend(row(1) size(medsmall) order(1 "regular article" 3 "comment") region(lwidth(none)))
graph export "${output}/single_author_science.eps", replace
graph export "${output}/single_author_science.png", replace





